Apresentação

Esse documento traz os resultados e códigos utilizados nas análises dos fatores que contribuem para a expansão dos casos de covid-19 no estado da Bahia. Esse estudo foi realizado pelo grupo de estudos em ecologia espacial da UFBA, incluindo diversos pesquisadores e laboratórios do Instituo de Biologia.

Pacotes

Se for seguir o código para recriar as análises, antes de inciar, carregue e instale os seguintes pacotes.

library(coronabr) # pode baixar aqui: https://github.com/liibre/coronabr
library(tidyverse)
library(car)
library(randomForest)
library(rgdal) #load map
library(sp) #plot maps

Baixar os dados para a Bahia

Com o código abaixo podemos baixar os dados para todos os municipios Bahia. Para saber mais sobre as fontes do dados acesse o seguinte link: https://github.com/liibre/coronabr.

covid <- as_tibble(get_corona_br(uf = "BA")) 

Pequenos ajustes na tabela:

covid <- covid %>% 
  filter(place_type == "city") %>% 
  mutate(city = factor(city, levels = unique(city)))

Dados por municipio:

mun_covid <- covid %>%  
  filter(date == date[1]) %>% 
  mutate(afetados = ifelse(confirmed > 0, 1, 0))

Estatísticas dos casos na Bahia:

mun_covid %>%  
  summarise("Casos totais" = sum(confirmed), 
            "Mortes totais" = sum(deaths),
            "Número de municipios afetados" = sum(confirmed > 0))

Causas que afetam a ocorrência de COVID-19

Carregando os dados do IBGE e mapbiomas:

ibge <- read_csv2(file("Data/new_ibge.csv", encoding="UTF-8")) %>% 
  separate(cidade, c("Cidade", "Estado"), sep = "\\(") %>% 
  mutate(Estado = str_remove(Estado, "\\)")) %>% 
   filter(Estado == "BA")
## Warning: Missing column names filled in: 'X1' [1]
federal <- read_csv2("Data/federal_w_codes.csv")
## Warning: Missing column names filled in: 'X1' [1]
centralidade <- read_csv2("Data/new.dat.ba.csv")
## Warning: Missing column names filled in: 'X1' [1]
clima <- read_csv2("Data/climatic.br.csv") %>% 
  mutate(precTotal = rowSums(.[3:7]),
         tmean = apply(.[8:17], 1, mean))
## Warning: Missing column names filled in: 'X1' [1]
meso <- read_csv('Data/meso.csv')
colnames(meso)[1] <- 'mesoregiao'
aero <- read_csv2('Data/main.air.ba.csv')
## Warning: Missing column names filled in: 'X1' [1]

Juntando as tabelas:

munis <- left_join(ibge, centralidade, by = c("cod_ibge" = "ibgecode")) %>% 
  left_join(federal, by = c("cod_ibge" = "ibge")) %>% 
  left_join(mun_covid, by = c("cod_ibge" = "city_ibge_code"))  %>% 
  left_join(clima, by = c("cod_ibge" = "ibge")) %>% 
  left_join(meso, by = c("cod_ibge" = "code")) %>% 
  left_join(aero, by = c("cod_ibge" = "ibge")) %>% 

  mutate(dens.road = ifelse(is.na(dens.road), 0, dens.road),
         afetados = ifelse(is.na(afetados), 0, afetados),
         airport = ifelse(is.na(airport), "NO", airport),
         confirmed = ifelse(is.na(confirmed), 0, confirmed),
         confirmed_per_100k_inhabitants = ifelse(is.na(confirmed_per_100k_inhabitants), 0, 
                                                 confirmed_per_100k_inhabitants),
         tam_pop_urb = total.pop * (1 - perc.rural))

Modelo logístico correlacionando as variáveis com presença ou ausência do vírus. As variáveis foram escolhidas visando diminuir a correlação entre elas e manter a expectativa teorica.

reg_log <- glm(afetados ~ 
                 airport + log(total.pop) + perc.rural +
                 log(eingen.cen.dist) + school.year +
                 perc.with.wages + dist.min +
                 tmean + log(precTotal),
               data = munis,
               family = binomial)

Checar a inflação do modelo:

# VIF
vif(reg_log)
##              airport       log(total.pop)           perc.rural 
##             1.275313             1.493799             1.584156 
## log(eingen.cen.dist)          school.year      perc.with.wages 
##             1.330059             1.955988             1.147539 
##             dist.min                tmean       log(precTotal) 
##             1.505226             1.137059             1.501329

Resultado do modelo:

resu <- summary(reg_log)

Adicionado por Anderson (20/04/20)

cor(munis$perc.rural, munis$total.pop)
## [1] -0.2140484
cor(munis$tam_pop_urb, munis$total.pop)
## [1] 0.9987153
cor(munis$tam_pop_urb, munis$perc.rural)
## [1] -0.2287015

Modelo logístico com novas variáveis:

reg_log2 <- glm(afetados ~ 
            airport + log(total.pop)+ perc.rural +
              log(eingen.cen.dist) + school.year +
              perc.with.wages + dist.min.ilh.ssa +
              log(precTotal) + tmean,
            data = munis,
            family = binomial)

Checar a inflação do modelo:

# VIF
vif(reg_log2)
##              airport       log(total.pop)           perc.rural 
##             1.220213             1.660506             1.669023 
## log(eingen.cen.dist)          school.year      perc.with.wages 
##             1.502610             1.974139             1.142408 
##     dist.min.ilh.ssa       log(precTotal)                tmean 
##             1.324672             1.204350             1.161880

Resultado do modelo:

resu2 <- summary(reg_log2)

Modelo logístico com novas variáveis: incluindo renda mensal (month.wages) e removendo variáveis para diminuir o VIF

reg_log3 <- glm(afetados ~ 
            airport + dist.min.ilh.ssa +
            log(eingen.cen.dist) + 
            perc.with.wages  + month.wages +
            log(precTotal) + tmean,
            data = munis,
            family = binomial)

Checar a inflação do modelo:

# VIF
vif(reg_log3)
##              airport     dist.min.ilh.ssa log(eingen.cen.dist) 
##             1.279615             1.131919             1.265554 
##      perc.with.wages          month.wages       log(precTotal) 
##             1.095174             1.657746             1.320686 
##                tmean 
##             1.097639
resu3 <- summary(reg_log3)

Modelo logístico apenas com mesorregiões:

reg_log4 <- glm(afetados ~ 
            mesoregiao,
            data = munis,
            family = binomial)

Na verdade, um qui-quadrado gourmet:

resu4 <- summary(reg_log4)

Modelo logístico apenas com estrutura da rede de transporte:

sim.roles<-ifelse(munis$roles=="network hub", "hub", munis$roles)

reg_log5 <- glm(afetados ~ 
            log(eingen.cen.dist)+sim.roles+as.factor(module),
            data = munis,
            family = binomial)

Na verdade, um qui-quadrado gourmet:

resu5 <- summary(reg_log5)

Comparando os modelos com AIC

aic.log <- AIC(reg_log,reg_log2,reg_log3, reg_log4, reg_log5)
## Warning in AIC.default(reg_log, reg_log2, reg_log3, reg_log4, reg_log5): models
## are not all fitted to the same number of observations
aic.log

Recalcular os valores de p por monte carlo, aleatorizando a variavel “afetados”. O modelo escolhido foi o reg_log2 com base no valor de AIC.

rept <- 1000
obs_z <- summary(reg_log2)$coefficients[, 3]
obs <- coefficients(reg_log)
zs <- coefs <- matrix(ncol = length(obs), nrow = rept)
colnames(zs) <- colnames(coefs) <- names(obs)
for (i in 1:rept) {
  munis$rnd_afetados <- sample(munis$afetados)
  reg_log <- glm(afetados ~ 
            airport + log(total.pop)+ perc.rural +
              log(eingen.cen.dist) + school.year +
              perc.with.wages + dist.min.ilh.ssa +
              log(precTotal) + tmean,
            data = munis,
            family = binomial)
  zs[i, ] <- summary(reg_log)$coefficients[, 3]
  coefs[i, ] <- coefficients(reg_log)
}

for (j in 1:length(obs)) {
  maior <- (sum(obs[j] >= coefs[, j]) + 1 ) / (rept + 1) * 2
  menor <- (sum(obs[j] <= coefs[, j]) + 1) / (rept + 1) * 2
  resu$coefficients[j, 4] <- ifelse(maior > menor, menor, maior)
}
resu
## 
## Call:
## glm(formula = afetados ~ airport + log(total.pop) + perc.rural + 
##     log(eingen.cen.dist) + school.year + perc.with.wages + dist.min + 
##     tmean + log(precTotal), family = binomial, data = munis)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0148  -0.7563  -0.5121   0.7901   2.4703  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)   
## (Intercept)          -1.961e+01  5.313e+00  -3.692    0.002 **
## airportYES           -1.027e+00  7.215e-01  -1.423    0.002 **
## log(total.pop)        9.928e-01  2.223e-01   4.466    0.002 **
## perc.rural           -2.896e+00  8.108e-01  -3.571    0.002 **
## log(eingen.cen.dist)  1.428e-01  9.322e-02   1.532    0.002 **
## school.year          -2.032e-01  2.626e-01  -0.774    0.002 **
## perc.with.wages       1.267e+00  2.813e+00   0.451    0.002 **
## dist.min              8.151e-05  3.152e-03   0.026    0.002 **
## tmean                 1.458e-01  1.125e-01   1.297    0.002 **
## log(precTotal)        1.296e+00  5.062e-01   2.559    0.002 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 508.37  on 404  degrees of freedom
## Residual deviance: 405.50  on 395  degrees of freedom
##   (12 observations deleted due to missingness)
## AIC: 425.5
## 
## Number of Fisher Scoring iterations: 4

As simulações de monte carlo confirmam os resultados anteriores.

CONCLUSÃO: O modelo indica que municípios com uma população grande e urbana, têm maior probabilidade de serem afetados. Bem como municipios com mais chuva durante os últimos 4 meses.

munis %>% mutate(afetados = ifelse(afetados == 1, "Sim", "Não")) %>% 
ggplot(aes(x = afetados ,
           y = total.pop)) +
  geom_violin() +
  geom_jitter(alpha = .5) +
  scale_y_log10() +
  xlab("Município afetado") +
  ylab("Tamanho população")

munis %>% mutate(afetados = ifelse(afetados == 1, "Sim", "Não")) %>% 
ggplot(aes(x = afetados ,
           y = perc.rural)) +
  geom_violin() +
  geom_jitter(alpha = .5) +
  xlab("Município afetado") +
  ylab("Porcentagem população rural")

munis %>% mutate(afetados = ifelse(afetados == 1, "Sim", "Não")) %>% 
ggplot(aes(x = afetados,
           y = precTotal)) +
  geom_violin() +
  geom_jitter(alpha = .5) +
  scale_y_log10() +
  xlab("Município afetado") +
  ylab("Precipitação")

Correlacao com numero de casos

Principais fatores contribuindo para a quantidade de casos, nas cidades afetadas.

reg_N <- lm(log(confirmed_per_100k_inhabitants+1) ~ 
            airport + log(total.pop) + perc.rural +
            log(eingen.cen.dist) + school.year +
            perc.with.wages + dist.min +
              precTotal + tmean,
            data = filter(munis, confirmed > 0))
vif(reg_N)
##              airport       log(total.pop)           perc.rural 
##             1.468365             3.002980             2.164975 
## log(eingen.cen.dist)          school.year      perc.with.wages 
##             1.784824             3.494782             1.293024 
##             dist.min            precTotal                tmean 
##             1.831519             1.735719             1.224695
resu <- summary(reg_N)
resu 
## 
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport + 
##     log(total.pop) + perc.rural + log(eingen.cen.dist) + school.year + 
##     perc.with.wages + dist.min + precTotal + tmean, data = filter(munis, 
##     confirmed > 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.33924 -0.60499 -0.08731  0.53679  2.34398 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)           6.8014874  2.4320154   2.797  0.00602 **
## airportYES            0.3294023  0.3587640   0.918  0.36038   
## log(total.pop)       -0.3350959  0.1313915  -2.550  0.01202 * 
## perc.rural           -1.0643024  0.5243482  -2.030  0.04459 * 
## log(eingen.cen.dist) -0.0458574  0.0672630  -0.682  0.49670   
## school.year           0.0812434  0.1574984   0.516  0.60692   
## perc.with.wages      -0.5236581  2.0210922  -0.259  0.79600   
## dist.min             -0.0019814  0.0020672  -0.958  0.33974   
## precTotal             0.0008158  0.0006226   1.310  0.19259   
## tmean                -0.0568844  0.0710695  -0.800  0.42506   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8569 on 120 degrees of freedom
## Multiple R-squared:  0.2089, Adjusted R-squared:  0.1496 
## F-statistic: 3.522 on 9 and 120 DF,  p-value: 0.0006673

O resultado preliminar aponta para um efeito negativo da população sobre o número de casos a cada 100 mil habitantes, porém pode ser apenas um efeito matemático. Para avaliar isso, recalculamos abaixo os valore de p usando uma simulação da Monte Carlo, onde o numero de casos foi aleatorizado e foi recalculada o número de casos por 100k habitantes.

rept <- 1000
obs <- coef(reg_N)
coefs <- matrix(ncol = length(obs), nrow = rept)
colnames(coefs) <- names(obs)
for (i in 1:rept) {
 munis$rnd_cases <- (sample(munis$confirmed) / munis$total.pop) * 1000
  while(sum(munis$airport[munis$rnd_cases > 0] == "YES") < 2) {
    munis$rnd_cases <- (sample(munis$confirmed) / munis$total.pop) * 1000
  }
  reg_N <- lm(log(rnd_cases+1) ~ 
              airport + log(total.pop) + perc.rural +
              log(eingen.cen.dist) + school.year +
              perc.with.wages + dist.min +
              precTotal + tmean,
              data = filter(munis, rnd_cases > 0))
  coefs[i, ] <- coef(reg_N)
}

for (j in 1:length(obs)) {
  maior <- (sum(obs[j] >= coefs[, j]) + 1 ) / (rept + 1) * 2
  menor <- (sum(obs[j] <= coefs[, j]) + 1) / (rept + 1) * 2
  resu$coefficients[j, 4] <- ifelse(maior > menor, menor, maior)
}
resu
## 
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport + 
##     log(total.pop) + perc.rural + log(eingen.cen.dist) + school.year + 
##     perc.with.wages + dist.min + precTotal + tmean, data = filter(munis, 
##     confirmed > 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.33924 -0.60499 -0.08731  0.53679  2.34398 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)           6.8014874  2.4320154   2.797   0.0020 **
## airportYES            0.3294023  0.3587640   0.918   0.1918   
## log(total.pop)       -0.3350959  0.1313915  -2.550   0.0480 * 
## perc.rural           -1.0643024  0.5243482  -2.030   0.0020 **
## log(eingen.cen.dist) -0.0458574  0.0672630  -0.682   0.2298   
## school.year           0.0812434  0.1574984   0.516   0.5734   
## perc.with.wages      -0.5236581  2.0210922  -0.259   0.5754   
## dist.min             -0.0019814  0.0020672  -0.958   0.1119   
## precTotal             0.0008158  0.0006226   1.310   0.0599 . 
## tmean                -0.0568844  0.0710695  -0.800   0.2178   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8569 on 120 degrees of freedom
## Multiple R-squared:  0.2089, Adjusted R-squared:  0.1496 
## F-statistic: 3.522 on 9 and 120 DF,  p-value: 0.0006673

Mesmo calculando o p-valor usando simulação, o efeito da população se mantém. Como pode ser visto abaixo (em cinza, coeficientes simulados, em vermelho o valor observado para o tamanho da população).

ggplot(tibble(coefs = coefs[, 3]), aes(x = coefs)) +
  geom_histogram(bins = 30) +
  geom_vline(aes(xintercept = obs[3]), color = "red") +
  ggtitle("Efeito do tamanho da população") +
  ylab("Frequência") +
  xlab("Coeficientes")

munis %>% filter(confirmed > 0) %>% 
ggplot(aes(y = log(confirmed_per_100k_inhabitants+1),
                  x = log(total.pop))) +
  geom_point()

Correlacao com numero de casos (Novas variáveis)

Principais fatores contribuindo para a quantidade de casos, nas cidades afetadas.

reg_N2 <- lm(log(confirmed_per_100k_inhabitants + 1) ~ 
            airport + perc.rural +
              log(eingen.cen.dist) + school.year +
              perc.with.wages + dist.min +
              precTotal + tmean + log(tam_pop_urb) +
              mesoregiao + dist.min.ilh.ssa,
            data = filter(munis, confirmed > 0))
vif(reg_N2)
##                            GVIF Df GVIF^(1/(2*Df))
## airport                1.569591  1        1.252833
## perc.rural             3.329593  1        1.824717
## log(eingen.cen.dist)   2.163480  1        1.470877
## school.year            4.976555  1        2.230819
## perc.with.wages        1.528771  1        1.236435
## dist.min               3.118893  1        1.766039
## precTotal              4.648730  1        2.156091
## tmean                  2.808240  1        1.675780
## log(tam_pop_urb)       4.948147  1        2.224443
## mesoregiao           114.339735  6        1.484282
## dist.min.ilh.ssa       4.320646  1        2.078616
resu2 <- summary(reg_N2)
resu2 
## 
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport + 
##     perc.rural + log(eingen.cen.dist) + school.year + perc.with.wages + 
##     dist.min + precTotal + tmean + log(tam_pop_urb) + mesoregiao + 
##     dist.min.ilh.ssa, data = filter(munis, confirmed > 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.37100 -0.47795 -0.01916  0.39335  1.94104 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              4.124e+00  2.579e+00   1.599 0.112601
## airportYES                               3.068e-01  3.127e-01   0.981 0.328703
## perc.rural                              -1.857e-02  5.482e-01  -0.034 0.973036
## log(eingen.cen.dist)                    -1.344e-01  6.243e-02  -2.153 0.033467
## school.year                              5.479e-01  1.584e-01   3.458 0.000768
## perc.with.wages                         -4.859e-01  1.853e+00  -0.262 0.793608
## dist.min                                 5.223e-03  2.274e-03   2.297 0.023472
## precTotal                               -4.052e-05  8.590e-04  -0.047 0.962464
## tmean                                   -1.593e-01  9.073e-02  -1.756 0.081835
## log(tam_pop_urb)                        -2.075e-01  1.181e-01  -1.757 0.081628
## mesoregiaoCentro-Sul Baiano              3.934e-01  2.981e-01   1.320 0.189618
## mesoregiaoExtremo Oeste Baiano           4.951e-01  7.720e-01   0.641 0.522599
## mesoregiaoMetropolitana de Salvador     -3.159e-01  3.518e-01  -0.898 0.371048
## mesoregiaoNordeste Baiano               -4.290e-01  2.832e-01  -1.515 0.132656
## mesoregiaoSul Baiano                     9.187e-01  3.668e-01   2.505 0.013681
## mesoregiaoVale São-Franciscano da Bahia  1.288e+00  4.376e-01   2.943 0.003951
## dist.min.ilh.ssa                        -3.561e-03  8.335e-04  -4.273 4.05e-05
##                                            
## (Intercept)                                
## airportYES                                 
## perc.rural                                 
## log(eingen.cen.dist)                    *  
## school.year                             ***
## perc.with.wages                            
## dist.min                                *  
## precTotal                                  
## tmean                                   .  
## log(tam_pop_urb)                        .  
## mesoregiaoCentro-Sul Baiano                
## mesoregiaoExtremo Oeste Baiano             
## mesoregiaoMetropolitana de Salvador        
## mesoregiaoNordeste Baiano                  
## mesoregiaoSul Baiano                    *  
## mesoregiaoVale São-Franciscano da Bahia ** 
## dist.min.ilh.ssa                        ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7224 on 113 degrees of freedom
## Multiple R-squared:  0.4706, Adjusted R-squared:  0.3956 
## F-statistic: 6.277 on 16 and 113 DF,  p-value: 8.601e-10

Reajustando o modelo 2 para diminuir o vif ou variáveis redundantes (removendo popurbana, distmin,meso e escolaridade)

reg_N3 <- lm(log(confirmed_per_100k_inhabitants + 1) ~ 
            airport + perc.rural +
              log(eingen.cen.dist) +
              perc.with.wages  +
              precTotal + tmean +
               dist.min.ilh.ssa,
            data = filter(munis, confirmed > 0))
vif(reg_N3)
##              airport           perc.rural log(eingen.cen.dist) 
##             1.169427             1.624219             1.287333 
##      perc.with.wages            precTotal                tmean 
##             1.261770             1.268491             1.105814 
##     dist.min.ilh.ssa 
##             1.161556
resu3 <- summary(reg_N3)
resu3 
## 
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport + 
##     perc.rural + log(eingen.cen.dist) + perc.with.wages + precTotal + 
##     tmean + dist.min.ilh.ssa, data = filter(munis, confirmed > 
##     0))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7965 -0.5618 -0.1265  0.5295  1.8302 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.8221712  2.1269787   2.737  0.00712 ** 
## airportYES            0.2087902  0.3019778   0.691  0.49062    
## perc.rural           -0.5308106  0.4283633  -1.239  0.21766    
## log(eingen.cen.dist) -0.1509441  0.0538791  -2.802  0.00592 ** 
## perc.with.wages      -0.0873838  1.8830835  -0.046  0.96306    
## precTotal             0.0013393  0.0005020   2.668  0.00867 ** 
## tmean                -0.1687760  0.0636953  -2.650  0.00912 ** 
## dist.min.ilh.ssa     -0.0022338  0.0004835  -4.620 9.58e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8082 on 122 degrees of freedom
## Multiple R-squared:  0.2845, Adjusted R-squared:  0.2435 
## F-statistic: 6.932 on 7 and 122 DF,  p-value: 6.047e-07

Modelo somente mesorregiões

reg_N4 <- lm(log(confirmed_per_100k_inhabitants + 1) ~ 
            mesoregiao,
            data = filter(munis, confirmed > 0))

Resultado

resu4 <- summary(reg_N4)
resu4 
## 
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ mesoregiao, 
##     data = filter(munis, confirmed > 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.96845 -0.55649 -0.02645  0.49207  1.91919 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                               1.8792     0.2008   9.359 4.77e-16
## mesoregiaoCentro-Sul Baiano               0.4831     0.2517   1.919   0.0572
## mesoregiaoExtremo Oeste Baiano           -0.8986     0.5053  -1.778   0.0778
## mesoregiaoMetropolitana de Salvador       0.2289     0.2759   0.830   0.4083
## mesoregiaoNordeste Baiano                -0.2896     0.2839  -1.020   0.3098
## mesoregiaoSul Baiano                      1.0256     0.2367   4.332 3.03e-05
## mesoregiaoVale São-Franciscano da Bahia   0.2833     0.3478   0.815   0.4169
##                                            
## (Intercept)                             ***
## mesoregiaoCentro-Sul Baiano             .  
## mesoregiaoExtremo Oeste Baiano          .  
## mesoregiaoMetropolitana de Salvador        
## mesoregiaoNordeste Baiano                  
## mesoregiaoSul Baiano                    ***
## mesoregiaoVale São-Franciscano da Bahia    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8031 on 123 degrees of freedom
## Multiple R-squared:  0.2877, Adjusted R-squared:  0.253 
## F-statistic: 8.282 on 6 and 123 DF,  p-value: 1.539e-07

Modelo somente estrutura da rede de transporte

reg_N5 <- lm(log(confirmed_per_100k_inhabitants + 1) ~ 
            as.factor(module)+log(eingen.cen.dist)+roles,
            data = filter(munis, confirmed > 0))
vif(reg_N5)
##                          GVIF Df GVIF^(1/(2*Df))
## as.factor(module)    1.335475  5        1.029351
## log(eingen.cen.dist) 1.214092  1        1.101858
## roles                1.502064  3        1.070158

Resultado

resu5 <- summary(reg_N5)
resu5 
## 
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ as.factor(module) + 
##     log(eingen.cen.dist) + roles, data = filter(munis, confirmed > 
##     0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.62071 -0.65374 -0.08344  0.52352  2.05712 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.523754   0.316205   4.819 4.27e-06 ***
## as.factor(module)2    0.465253   0.310408   1.499  0.13654    
## as.factor(module)3    0.100976   0.397312   0.254  0.79982    
## as.factor(module)4    0.168103   0.254437   0.661  0.51008    
## as.factor(module)5    0.816464   0.230969   3.535  0.00058 ***
## as.factor(module)6   -0.008503   0.262474  -0.032  0.97421    
## log(eingen.cen.dist) -0.133580   0.055505  -2.407  0.01762 *  
## roleshub              0.340170   0.494808   0.687  0.49311    
## rolesnetwork hub      1.633375   0.674310   2.422  0.01692 *  
## rolesperipheral      -0.182835   0.214891  -0.851  0.39656    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8574 on 120 degrees of freedom
## Multiple R-squared:  0.2081, Adjusted R-squared:  0.1487 
## F-statistic: 3.504 on 9 and 120 DF,  p-value: 0.0007013

Modelo incluindo renda mensal (month.wages)

reg_N6 <- lm(log(confirmed_per_100k_inhabitants + 1) ~ 
            airport + dist.min.ilh.ssa +
            log(eingen.cen.dist) + 
            perc.with.wages  + month.wages +
            log(precTotal) + tmean,
            data = munis)

Checar a inflação do modelo:

# VIF
vif(reg_N6)
##              airport     dist.min.ilh.ssa log(eingen.cen.dist) 
##             1.224698             1.205719             1.357233 
##      perc.with.wages          month.wages       log(precTotal) 
##             1.145097             1.793628             1.346964 
##                tmean 
##             1.104156
resu6 <- summary(reg_N6)
resu6
## 
## Call:
## lm(formula = log(confirmed_per_100k_inhabitants + 1) ~ airport + 
##     dist.min.ilh.ssa + log(eingen.cen.dist) + perc.with.wages + 
##     month.wages + log(precTotal) + tmean, data = munis)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3021 -0.6624 -0.2519  0.5176  3.3679 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -4.4180348  1.8999086  -2.325 0.020554 *  
## airportYES           -0.1286528  0.3072251  -0.419 0.675620    
## dist.min.ilh.ssa     -0.0019580  0.0003442  -5.688 2.50e-08 ***
## log(eingen.cen.dist)  0.0241005  0.0368376   0.654 0.513338    
## perc.with.wages      -0.4109089  1.1797772  -0.348 0.727804    
## month.wages           0.0021503  0.0004911   4.378 1.53e-05 ***
## log(precTotal)        0.6991059  0.2035196   3.435 0.000655 ***
## tmean                 0.0227614  0.0452863   0.503 0.615516    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.055 on 397 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.2356, Adjusted R-squared:  0.2221 
## F-statistic: 17.48 on 7 and 397 DF,  p-value: < 2.2e-16

AIC

AIC(reg_N,reg_N3,reg_N4, reg_N5, reg_N6)
## Warning in AIC.default(reg_N, reg_N3, reg_N4, reg_N5, reg_N6): models are not
## all fitted to the same number of observations

Correlacao com o tempo até o primeiro caso

covid.day <- get_corona_br(
  dir = "output",
  filename = "corona_brasil",
  cidade = NULL,
  uf = 'BA',
  ibge_cod = NULL,
  by_uf = FALSE
)
covid.day <- data.frame(covid.day, 
                        afetados = ifelse(covid.day$confirmed > 0, 1, 0))
covid.day <- left_join(ibge, centralidade, by = c("cod_ibge" = "ibgecode")) %>% 
  left_join(federal, by = c("cod_ibge" = "ibge")) %>% 
  left_join(covid.day, by = c("cod_ibge" = "city_ibge_code"))  %>% 
  left_join(clima, by = c("cod_ibge" = "ibge")) %>% 
  left_join(meso, by = c("cod_ibge" = "code")) %>% 
  
  mutate(dens.road = ifelse(is.na(dens.road), 0, dens.road),
         afetados = ifelse(is.na(afetados), 0, afetados),
         airport = ifelse(is.na(airport), "NO", airport),
         confirmed = ifelse(is.na(confirmed), 0, confirmed),
         confirmed_per_100k_inhabitants = ifelse(is.na(confirmed_per_100k_inhabitants),
                                                 0,
                                                 confirmed_per_100k_inhabitants))
cod <- unique(covid.day$cod_ibge)
tempo <- data.frame(matrix(ncol = 2, nrow = length(cod)))
colnames(tempo) <- c('cod_ibge', 'tempo_1')
tempo[, 1] <- cod

for(i in 1:length(cod)){
  tab.cod <- data.frame(covid.day[covid.day$cod_ibge == cod[i], ])
  tab.cod <- tab.cod[order(tab.cod$date), ]
  if(sum(tab.cod$confirmed > 0) > 0){ # Mudar de acordo com a qtd de casos
  primeiro <- tab.cod[min(which(tab.cod$confirmed > 0)), 'date']  
  baseline <- as.Date.character('2020-03-06')
  tempo [i, 2] <- as.numeric(difftime(primeiro, baseline, 'days'))
  }
  if(sum(tab.cod$confirmed > 0) == 0){
    tempo [i, 2] <- NA 
  }
}

munis <- merge(munis, tempo, by = 'cod_ibge',
               all.x = T, all.y = F, sort = F)

Principais fatores contribuindo para a quantidade de casos Removi distmin porque é redundante com dis.min.ilh.ssa Removi meso por causa do vif alto

reg_T <- lm(tempo_1 ~ 
              airport +
              log(eingen.cen.dist) + school.year +
              perc.with.wages  +
              precTotal + tmean +
              dist.min.ilh.ssa,
            data = filter(munis, confirmed > 0))
vif(reg_T)
##              airport log(eingen.cen.dist)          school.year 
##             1.215042             1.492712             1.756332 
##      perc.with.wages            precTotal                tmean 
##             1.237328             1.142433             1.159400 
##     dist.min.ilh.ssa 
##             1.098192
resu <- summary(reg_T)
resu 
## 
## Call:
## lm(formula = tempo_1 ~ airport + log(eingen.cen.dist) + school.year + 
##     perc.with.wages + precTotal + tmean + dist.min.ilh.ssa, data = filter(munis, 
##     confirmed > 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.8453  -6.9123  -0.8611   7.9989  25.1059 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          110.740264  27.040035   4.095 7.61e-05 ***
## airportYES            -2.469370   4.079199  -0.605    0.546    
## log(eingen.cen.dist)   0.376812   0.768872   0.490    0.625    
## school.year           -6.178689   1.395587  -4.427 2.09e-05 ***
## perc.with.wages      -21.477892  24.712282  -0.869    0.386    
## precTotal             -0.002585   0.006314  -0.410    0.683    
## tmean                 -0.695397   0.864319  -0.805    0.423    
## dist.min.ilh.ssa      -0.001173   0.006230  -0.188    0.851    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.71 on 122 degrees of freedom
## Multiple R-squared:  0.2631, Adjusted R-squared:  0.2208 
## F-statistic: 6.223 on 7 and 122 DF,  p-value: 3.046e-06

Prever municipios com maior probabilidade de serem afetados

Para isso usamos um random forest.

# data_mod2 <- data_mod %>% na.omit() %>% 
#   mutate(total.pop = log(total.pop),
#          eingen.cen.dist = log(eingen.cen.dist),
#          afetados = as.factor(afetados),
#          airport = as.factor(airport)) 
# 
# reg_log <- randomForest(afetados ~ 
#                  airport + total.pop + perc.rural +
#                  eingen.cen.dist + school.year +
#                  perc.with.wages + dist.min,
#                data = data_mod2, 
#                importance = TRUE)

Fazer um mapa de vulnerabilidade aqui.

Taxa de crescimento dos casos de covid-19

Calcular a taxa

# USAR MESMA ESTRATEGIA DO OUTRO ARTIGO

Gráfico de todos os municipios

cols <- ifelse(levels(covid$city) == "Salvador", "black", "gray")
covid %>%
  ggplot(aes(x = date, y = confirmed, color = city)) +
  scale_color_manual(values = cols) +
  geom_line() +
  ggtitle("Casos de COVID-19 por cidade da Bahia") +
  ylab("Casos confirmados") +
  xlab("Data") +
  theme_classic() +
  theme(legend.position = "none") +
  geom_text(aes(x = date[1] - 8, y = 300, 
                label = "Salvador"), color = "black")

Como a taxa tem variado ao longo do tempo?

covid <- as_tibble(get_corona_br(uf = "BA")) 

Pequenos ajustes na tabela:

covid.ba <- covid %>% 
  filter(place_type == "state") 

covid.ci <- covid %>% 
  filter(place_type == "city") 

Segunda abordagem para crescimento exponencial

gm_mean <- function(x, na.rm = TRUE){
  exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
}
r_calc <- function(x) {
  gm_mean(x[2:length(x)] / x[1:(length(x)-1)])
}

Taxa de crescimento Bahia

casos.ba <- covid.ba$confirmed[nrow(covid.ba):1] #backwards
tempo.ba <- 1:length(casos.ba)
head(covid.ba)
exp.ba<- lm(log(casos.ba)~tempo.ba)
tax.ba<-coef(exp.ba)[2]

r.time.b<- data.frame(tempo=tempo.ba,
           confirmados=casos.ba,
           data=covid.ba$date[nrow(covid.ba):1],
           taxa=NA,
           r.squ=NA,
           taxa2=NA) #segunda abordagem

for (i in 5:length(casos.ba))
{
  exp.temp<-lm(log(casos.ba[1:i])~tempo.ba[1:i])
  r.time.b[i,4]<-coef(exp.temp)[2]
  r.time.b[i,5]<-summary(exp.temp)$r.squared
  r.time.b[i,6]<-r_calc(casos.ba[1:i])
}

Correlação entre as duas abordagens

cor.test(r.time.b[,4], r.time.b[,6])
## 
##  Pearson's product-moment correlation
## 
## data:  r.time.b[, 4] and r.time.b[, 6]
## t = 20.722, df = 46, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9127250 0.9720364
## sample estimates:
##       cor 
## 0.9503889

Taxa de crescimento Bahia

plot(r.time.b$data, r.time.b$taxa,
     bty="l", pch=19, ylim=c(0,0.3),
     xlab="Data", ylab="Taxa Bahia")

Taxa de crescimento Municípios

city.codes<-na.omit(unique(covid.ci$city_ibge_code))
city.n<-length(city.codes)
w=1
list.ci<-list()
for (w in 1:city.n)
{
  covid.temp <- covid.ci %>% 
  filter(city_ibge_code == city.codes[w])
  n.temp<-nrow(covid.temp)
  casos.temp<-covid.temp$confirmed[n.temp:1]
  tempo.temp<-1:n.temp
  
  list.ci[[w]]<-data.frame(tempo=tempo.temp, #tempo
                      casos=casos.temp) #casos
                      
}
names(list.ci)<-city.codes
n.obs.ci<-sapply(list.ci, nrow) #tamanho das séries temporais

#filtrando cidades com no mínimo dez dias com corona
new.list.ci<-list.ci[n.obs.ci>9]

#Ajustado modelo exponencial para cada munícipio
exp.ci<-log(tempo)~casos #equação
mod.exp.ci<-lapply(new.list.ci, lm, formula=exp.ci) #modelo exponencial
coe.exp.ci<-lapply(mod.exp.ci, coef) #coeficientes
r.ci<-sapply(coe.exp.ci, "[", 2) #só a inclinação ("taxa r")
sum.exp.ci<-lapply(mod.exp.ci, summary) #sumário dos modelos
r.squ.ci<-sapply(sum.exp.ci, "[", "r.squared")
r.squ.ci<-unlist(r.squ.ci)

#ajustando na segunda abordagem
r.ci2<-numeric()
for (i in 1:length(new.list.ci))
{
  r.ci2[i]<-r_calc(new.list.ci[[i]][,2])
}


# Montando a planilha

r.mun.dat<-data.frame(city_ibge_code=names(new.list.ci),
           taxa=r.ci,
           r.square=r.squ.ci,
           taxa2=r.ci2)
only80<-r.mun.dat %>% 
  filter(r.square>0.8)
cor.test(only80[,2], only80[,4])
## 
##  Pearson's product-moment correlation
## 
## data:  only80[, 2] and only80[, 4]
## t = NaN, df = 11, p-value = NA
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  NaN NaN
## sample estimates:
## cor 
## NaN

Que fatores afetam a taxa de crescimento?

Quando ocorreram os picos?

Qual a meta de quarentena para evitar colapso do sistema?

Mapas

ba.shp<-readOGR("Data/bahia.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/brunovilela/OneDrive/Papers/COVID19-Bahia/Data/bahia.shp", layer: "bahia"
## with 417 features
## It has 2 fields
x.ibge<-match(ba.shp$CD_GEOCMU, munis$cod_ibge)
x.ibge2<-match(ba.shp$CD_GEOCMU, filter(munis, confirmed > 0)$cod_ibge)
ba.shp$mesoregiao<-munis$mesoregiao[x.ibge]
ba.shp$module<-munis$module[x.ibge]
ba.shp$total.pop<-log(munis$total.pop)[x.ibge]
ba.shp$perc.rural<-munis$perc.rural[x.ibge]*100
ba.shp$dist.min.ilh.ssa<-munis$dist.min.ilh.ssa[x.ibge]
ba.shp$precTotal<-log(munis$precTotal)[x.ibge]
ba.shp$afetados<-munis$afetados[x.ibge]
ba.shp$res_casos<-resid(reg_N3)[x.ibge2]
ba.shp$pred_afetados<-predict(reg_log2, type="response")[x.ibge]

sp::spplot(ba.shp, "mesoregiao", main="Messoregiões")
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors
## Warning in Ops.factor(z, at[i]): '>=' not meaningful for factors
## Warning in Ops.factor(z, at[i + 1]): '<' not meaningful for factors

sp::spplot(ba.shp, "module", main="Módulos")

sp::spplot(ba.shp, "total.pop", main="Log Tamanho da População")

sp::spplot(ba.shp, "perc.rural", main="% População Rural")

sp::spplot(ba.shp, "dist.min.ilh.ssa", main="Distância de Aeroportos")

sp::spplot(ba.shp, "afetados", main="Municípios Afetados")

sp::spplot(ba.shp, "res_casos", main="Resíduos Casos")

sp::spplot(ba.shp, "pred_afetados", main="Probabilidade Afetados")